


#
import requests
from bs4 import BeautifulSoup
import json

#分页下载网页内容  携程 酒店评论
# https://hotels.ctrip.com/hotels/60182899.html#ibu_hotel_review
#https://m.ctrip.com/restapi/soa2/21881/json/GetReviewList?testab=bd38fbe4ce8642f254b0f60450fe3bba83586e2cecfe84937ab0673f72f611a3
def get_request_page():
    # 设置请求头，使其看起来像是从浏览器发出的
    # 可以根据需要添加更多的请求头
    headers = {
        'authority': 'm.ctrip.com',
        'method': 'POST',
        'path': '/restapi/soa2/21881/json/GetReviewList?testab=a9ce8a571551a3f220d0b807d108ce084d422636d1bfdaad29a121443ba5ac75',
        'scheme': 'https',
        'Accept': 'application/json',
        'Accept-Encoding': 'gzip, deflate, br, zstd',
        'Accept-Language': 'zh-CN,zh;q=0.9',
        'Content-Length': '1802',
        'Content-Type': 'application/json;charset=UTF-8',
        'Cookie': """MKT_CKID=1688730950125.25cnl.406v; GUID=09031155110937841563; _RSG=iy4r.zzjWGEvjKAx_aWEg8; _RDG=288b60020a8a1125091af262f8e2ce855e; _RGUID=e420b7df-7ed4-4f2c-8a8e-824ffb294b79; UBT_VID=1688730950066.2ds6m2; nfes_isSupportWebP=1; __zpspc=9.4.1691928734.1691928734.1%232%7Cwww.baidu.com%7C%7C%7C%7C%23; FlightIntl=Search=[%22LHW|%E5%85%B0%E5%B7%9E(LHW)|100|LHW|480%22%2C%22CTU|%E6%88%90%E9%83%BD(CTU)|28|CTU|480%22%2C%222024-06-07%22]; _abtest_userid=c9aea6b1-355e-4cfd-8566-1725fbfb706b; _gid=GA1.2.1320630178.1723976153; _RF1=112.45.96.62; _lizard_LZ=AdKeULGBNOYRMaWfcQSbTZXFHCVPDJEI-sx3glh0n8t6ovi+yj1uwprk9mq2z547; ibu_h5_site=CN; ibu_h5_group=ctrip; ibu_h5_local=zh-cn; ibu_h5_local=zh-cn; ibu_h5_lang=zhcn; ibu_h5_curr=CNY; ibulanguage=CN; ibulocale=zh_cn; cookiePricesDisplayed=CNY; Hm_lvt_a8d6737197d542432f4ff4abc6e06384=1723977040; _ga=GA1.1.1854649260.1723976153; MKT_Pagesource=PC; _ga_9BZF483VNQ=GS1.1.1723977040.1.0.1723977044.0.0.0; _ga_5DVRDQD429=GS1.1.1723976153.1.1.1723977044.0.0.0; _ga_B77BES1Z8Z=GS1.1.1723976153.1.1.1723977044.56.0.0; intl_ht1=h4=2_60182899,1_375126; cticket=C19D220C5764EFF8B08E6248BBF0CCC0149FC1D42B07DB0EDAA682C418769DCE; login_type=0; login_uid=AC2E95D5B1413D98D8D6EE0B86ADD8DE780246B329A274BD9F8D4BA3DDA50E33; DUID=u=AA50287A50B381E9C6DA11F68782B6F7&v=0; IsNonUser=F; AHeadUserInfo=VipGrade=0&VipGradeName=%C6%D5%CD%A8%BB%E1%D4%B1&UserName=%C1%E8%C3%F4&NoReadMessageCount=0; Session=smartlinkcode=U1535&smartlinklanguage=zh&SmartLinkKeyWord=&SmartLinkQuary=&SmartLinkHost=; Union=AllianceID=1315&SID=1535&OUID=&createtime=1723977172&Expires=1724581971965; librauuid=; _bfa=1.1688730950066.2ds6m2.1.1723977174049.1724035534584.56.1.102003; _jzqco=%7C%7C%7C%7C1723976989448%7C1.1415885115.1723976989262.1723977174559.1724035534912.1723977174559.1724035534912.0.0.0.13.13""",
        'Origin': 'https://hotels.ctrip.com',
        'P': '42196778658',
        'Priority': 'u = 1, i',
        'Referer': 'https://hotels.ctrip.com/',
        'Sec-Ch-Ua': 'Not/A)Brand";v="8", "Chromium";v="126", "Google Chrome";v="126',
        #'Sec-Ch-Ua-Mobile:': '0',
        'Sec-Ch-Ua-Platform': 'Windows',
        #'Sec-Fetch-Dest:': 'empty',
        'Sec-Fetch-Mode': 'cors',
        'Sec-Fetch-Site': 'same-site',
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36'
    }
    url = "https://m.ctrip.com/restapi/soa2/21881/json/GetReviewList?testab=a9ce8a571551a3f220d0b807d108ce084d422636d1bfdaad29a121443ba5ac75"
    with open(file="reviews.csv", mode="w", encoding="utf8") as f:
        for page_no in range(1, 2):
            print(f"正在爬取第 {page_no} 页 ... ")
            params = {
                    "MasterHotelId": 60182899,
                    "NeedFilter": True,
                    "PageNo": 2,
                    "PageSize": 10,
                    "ServerData": "",
                    "UnUsefulPageNo": 1,
                    "UnUsefulPageSize": 5,
                    "genKeyParam": {
                        "a": 60182899,
                        "d": "zh-cn",
                        "e": 2
                    },
                    "genk": True,
                    "head": {
                        "AID": "1315",
                        "ClientID": "09031155110937841563",
                        "Currency": "CNY",
                        "Device": "PC",
                        "Frontend": {
                            "pvid": "1",
                            "sessionID": "56",
                            "vid": "1688730950066.2ds6m2"
                        },
                        "Group": "ctrip",
                        "HotelExtension": {
                            "WebpSupport": True,
                            "group": "CTRIP",
                            "hasAidInUrl": False,
                            "hotelUuidKey": "4dAwTsYlAe76ioNKLteS9E7pIkHWnOw1jNYBtjo9K6TjQ7YqlETvtfyQv9YPEG7rp6jpLeNQEzcjDNW58WZcvB5ekYFhwUUK1fwLQYcTw8hwzpjMJXOjo9w04vP6jfJbtj9ljozvPkjSJ9ny8BWlZyFzjfFvDhe8bYFcjPpy3TjgXwddKhYdnEO3jNqJzSyAOj1ZvsLEPfvaOWpfjGHImpKfjzY14Ip5R0NvQTY1TifPwSPRanEpTWt1yNAK5gWGYsbeatYLGWdaxbFES4KMYM1xdHRqqKaDwB3KOMJTY8sInbxaQRf5vFnYoBylsjpov5De9gY8GjoXyLJldvG1YktyzhjbTv37eHcYb7jLHygJFnYDavNFW0BWm3I4lJAhWSYccK9oxPSR9Mvn5ezXYLdi36YZFj0v1keHYonxXyFsJkUiZQjMZxBYk5xTBwcpJ3aecE36e7Yl4Ka6e78wSbYhBifFiAbilXjo3r4qxZQjhYQAegpiQXwaPyqaYgHYtAwftyUovzTYs1wsbED8KlBJnYPZiXTw7FRk5Rfpy5LYZ0EPdWOLilXJofw4QiD9y3AEsqJNbWMfIpZRp8WPYkmxd1inmwahIZPJ3DWkYZDrzliNNKqnRkNwcNjoHjfayQDi1tWPlv9AJh9WpDWlSwZQwOoWfmyNJDYFOYlnYt4wspjaGw9fvZmjbHKfFj0syFY5GKN9J8bImqRsqYUqj43WBben0y3gYptWSfRD1JLoi7YAsWMOi6jOzRfUYb3j91WzBeOcwX6vqbW3cvAqI5QRDY5LjgPy5DRtqR9mytNY4TEO6WbHiHBJhqJDHipoEt3RfgWGcJh4Yqmy9Sr0YnEA5EffxFTEGmjFSWGdWzPWT3Ya5YQgYFSRGnYq1WpMY87Y45YODjzLesXEHNW7UeP1wUHeT4jfoYFAyAoEgAj8NEocr47jGowl7ydoj6ziG0WSYA6R8FWpmW6BWzgWn6YqYbwTjszEtfvmtEUzWF1y3QjoJTQvoMEz7W97yALjgXrcGwXbJfYfkKa7YTMencEQTEohE9ORXOEqoe4XInnxgYsmJFDWn8vc8YO7EolEO8YbmYAdYokYgDwDOv4D"
                        },
                        "IsQuickBooking": "",
                        "Locale": "zh-CN",
                        "OUID": "",
                        "P": "42196778658",
                        "PageID": "102003",
                        "ReferenceID": "",
                        "SID": "1535",
                        "Ticket": "",
                        "TimeZone": "8",
                        "UID": "",
                        "UserIP": "112.45.96.62",
                        "UserRegion": "CN",
                        "Version": ""
                    },
                    "isHasFold": False,
                    "ssr": False
                }
            
            try:
                page_list = requests.post(url=url, json=params , headers =headers)
                print(page_list.text)
                # page_content_list = page_list.json()
                # for review in page_content_list["Response"]["ReviewList"]:
                #     score = review["reviewDetails"]["reviewScore"]["score"]
                #     label = 0 if float(score) > 3 else 1
                #     content = review["reviewDetails"]["reviewContent"].strip().replace("\ ").replace("\t","")
                #     print(label, content, sep="\\t", end="\\n",file=f)
            except Exception as ex:
                print(ex)
                # print(page_content_list)
                print("------------------------------------------------------------------")
                continue
                
    print("搞定")







reponse_text =  get_request_page()



